---
title: "From Hashtags To Ballots - Analysis of Instagram Data"
format: html
editor: visual
---

This document includes the analysis of our Instagram dataset.

## Setup 

We load necessary libraries and the main dataset, which already includes crowdsourced codings of Instagram posts:

```{r}
library(tidyverse)
library(hrbrthemes)
library(scales)
library(lme4)
library(sjPlot)

df_insta <- read_tsv("insta_posts_coded.tsv")
```

## Analysis for Main Paper

These are the numbers for the sum of likes and comments which posts in our sample received:

```{r}
sum(df_insta$likes)
sum(df_insta$comments)
```


The following code reproduces figure 2 in our main paper:

```{r fig.height=6, fig.width=9}
agg_shares <- df_insta |>
  mutate(pos_neg_mode = ifelse(pos_neg_mode == "party_no", "no", "yes")) |>
  group_by(user_md5) |> summarize(
    obs = n(),
    share_sellout = sum(ads_mode == "yes") / obs,
    share_politics = sum(politics_mode == "yes") / obs,
    share_party = sum(pos_neg_mode == "yes") / obs
  ) |>
  filter(obs >= 5) |> ungroup()



agg_shares |>
  pivot_longer(cols = c("share_sellout", "share_politics", "share_party")) |>
  ggplot(aes(x = value, y = name)) +
  geom_violin(aes(fill = name, alpha = 0.8), linewidth = 1) +
  geom_boxplot(width = 0.15,
               color = "black",
               fill = "white") +
  scale_y_discrete(
    labels = c(
      'Posts including support
                            or disapproval of parties,
                            politicians, or political events',
      'Posts including\npolitical content',
      'Posts including\nadvertisement'
    )
  ) +
  scale_fill_viridis_d() +
  scale_x_percent(breaks = pretty_breaks(n = 9)) +
  labs(y = NULL, x = "Percentage (by Influencer)") +
  theme_light(base_size = 14) +
  theme(
    legend.position = "none",
    plot.caption = element_text(face = "italic"),
    axis.text.y = element_text(size = 12),
    plot.margin = unit(c(0, 0, 0, -3), "cm")
  )


ggsave("output/fig2.png", bg = "white",
       units = "in", dpi = 300,
     width = 9, height = 6)
```

And this code block creates figure 3 of our main paper:

```{r}


df_insta |> mutate(
  pos_neg_mode = ifelse(pos_neg_mode == "party_no", "no", "yes"),
  date = ymd(post_date),
  week = week(post_date)
) |>
  group_by(user_md5) |> mutate(obs = n()) |>
  filter(obs >= 5 & date < ymd("2021-09-28")) |>  ungroup() |>
  group_by(week) |>
  summarise(
    obs = n(),
    sellout = sum(ads_mode == "yes") / obs,
    politics = sum(politics_mode == "yes") / obs,
    party = sum(pos_neg_mode == "yes") / obs
  ) |>
  pivot_longer(cols = c("sellout", "politics", "party")) |>
  ggplot(aes(
    x = week,
    y = value,
    groups = name,
    color = name
  ))  +
  geom_point() +
  geom_smooth(aes(fill = name), alpha = 0.25, level = 0.95) +
  geom_vline(aes(xintercept = 38.5), linetype = 2) +
  geom_text(aes(label = "BTW 2021", x = 37.5, y = 0.53), size = 4.5) +
  
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  scale_y_percent(breaks = pretty_breaks(n = 7)) +
  scale_color_viridis_d(
    labels = c(
      "support / disproval of political\nentities or events",
      "including political content",
      "including advertisement"
    )
  ) +
  scale_fill_viridis_d(
    labels = c(
      "support / disproval of political\nentities or events",
      "including political content",
      "including advertisement"
    )
  ) +
  labs(x = "Week of Year 2021",
       y = "Percentage of Posts",
       fill = "Category",
       color = "Category") +
  #     caption = "Lines and uncertainty bands estimated by LOESS algorithm.") +
  theme_light(base_size = 14) +
  theme(
    legend.position = c(0.26, 0.72),
    legend.title = element_blank(),
    plot.caption = element_text(face = "italic"),
    legend.box.background = element_rect(colour = "grey50")
  )

ggsave("output/fig3.png", bg = "white",
      units = "in", dpi = 300,
    width = 9, height = 6)
```


## Analysis for Supplementary Material

Here, we compute two models for likes and comments received by Instagram posts as explained in the supplementary material: 

```{r}
dummy_df <- df_insta |> mutate(
  party_dummy = ifelse(pos_neg_mode == "party_no", 0, 1),
  politics_dummy = ifelse(politics_mode == "no", 0, 1),
  sellout_dummy = ifelse(ads_mode == "no", 0, 1),
  day = ymd(post_date) |> day()
) |>
  select(
    party_dummy,
    sellout_dummy,
    politics_dummy,
    likes,
    comments,
    user_md5,
    tags_dummy,
    day
  ) |>
  
  drop_na()

l.nb <- glmer.nb(
  likes ~ party_dummy + politics_dummy + sellout_dummy +
    tags_dummy +
    (1 | user_md5),
  data = dummy_df,
  verbose = TRUE,
  control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun =
                                                                2e5))
)

c.nb <- glmer.nb(
  comments ~ party_dummy + politics_dummy + sellout_dummy +
    tags_dummy +
    (1 | user_md5),
  data = dummy_df,
  verbose = TRUE,
  control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
)
```

The following code block visualizes results from the models using a forest plot:

```{r fig.height=6, fig.width=9}
plot_models(
  l.nb,
  c.nb,
  vline.color = "black",
  transform = "exp",
  axis.labels = c(
    "tags other users",
    "includes advertisement",
    "includes political content",
    "includes support / disproval of political entities"
  )
)  +
  theme_light(base_size = 14) +
  scale_y_continuous(limits = c(0, 2)) +
  scale_color_viridis_d(end = 0.9,
                        labels = c("comments received", "likes received")) +
  theme(
    legend.position = c(0.8, 0.3),
    legend.box.background = element_rect(colour = "grey50")
  ) +
  
  labs(title = NULL, x = "Covariates", color = "Dependent Variables")

ggsave("output/fig_s3.png", bg = "white",
       units = "in", dpi = 300,
     width = 9, height = 6)
```
